Imports and set up¶

In [7]:
import requests
import numpy as np
import seaborn as sns
import json
import matplotlib.pyplot as plt
import pandas as pd
import holoviews as hv
from holoviews import opts, dim
from bokeh.sampledata.les_mis import data

hv.extension('bokeh')
hv.output(size=200)

I formulate the query to the GraphQL API implemented by the subgraph TetherUSDT: in detail, I want to retrieve the following features with regard to issues, transfers and approvals.

In [39]:
query = """query {
	issues(orderBy: amount, orderDirection: asc) {
    	id
    	amount
 	}
	transfers {
		id
		from	
		to
		value
	}
	approvals {
		id
		owner	
		spender
		value
	}
}"""

I use a temporary query url instead of the real query URL with the generated API key (https://gateway.testnet.thegraph.com/api/e0e91a841b861b1b22da614b4d6ef4b7/subgraphs/id/EW29ZpxgJRNUzjW5qs1KRyijWh2t171WmgNJ8sNmN7P5), since the latter method requires the adding of some GRT to my billing balance.

In [40]:
url = "https://api.studio.thegraph.com/query/16935/tetherusdt/v0.0.3"
r = requests.post(url, json={'query': query})
print(r.status_code)
#print(r.text)
200
In [41]:
json_data = json.loads(r.text)

Issues¶

In [5]:
df_issues = pd.DataFrame(json_data["data"]["issues"])
df_issues['amount'] = pd.to_numeric(df_issues['amount'])

The first 5 records of issues DataFrame.

In [14]:
df_issues.head()
Out[14]:
id amount
0 0x2ec30181b26f842558280b682a5a58e15cae6498b4c1... 8005642000
1 0x8cfc4f5f4729423f59dd1d263ead2f824b3f133b02b9... 10000000000
2 0x27b0df3879a34fff8abd827eb0a05892041af7763a88... 10000000000000
3 0x31f01e3f69d763c70e9965c370475f454338effdcca4... 15000000000000
4 0x79595df1a5d8b96c017cd78bbd844fb68c94c19edb55... 15000000000000
In [6]:
values = list(df_issues['amount'].value_counts().sort_index().keys())
frequencies = df_issues['amount'].value_counts().sort_index()
In [11]:
plt.figure(figsize=(35,10))
ax = sns.barplot(x=values, y=frequencies)
ax.set_xticklabels(ax.get_xticklabels(),rotation = 30)
xlabels = [j//1000000000 for j in values]
ax.set_xticklabels(xlabels)
plt.title("Bar plot of the frequencies of issues amounts (in billion)")
sns.set(font_scale = 6)
plt.show()

Approvals¶

In [42]:
df_approvals = pd.DataFrame(json_data["data"]["approvals"])

The value feature assumes incredibly high number.

In [43]:
df_approvals['value'][0]
Out[43]:
'115792089237316195423570985008687907853269984665640564039457584007913129639935'

In order to plot these values, I try to manage their magnitude through standardization (min-max scaling), however I discover that the best approach is approximate each amount with the number of its digits.

In [44]:
values_length = [len(i) for i in df_approvals['value']]
In [46]:
plt.figure(figsize=(30,6))
sns.histplot(values_length, discrete=True)
plt.title("Frequency distribution of values' length")
sns.set(font_scale = 0.5)
plt.show()
In [47]:
df_approvals['value'] = values_length
In [54]:
graph = hv.Sankey(df_approvals.iloc[:, 1:])
graph.opts(
    opts.Sankey(label_position='left', width=600, height=1100, cmap='Set1',
                edge_color=dim('spender').str(), node_color=dim('spender').str()))
Out[54]:

Transfers¶

In [ ]:
df_transfers = pd.DataFrame(json_data["data"]["transfers"])
df_transfers['value'] = pd.to_numeric(df_transfers['value'])

The first 5 records of transfers DataFrame.

In [ ]:
df_transfers.head()
id from to value
0 0x00000112ed5c1d8741bb77c2b0920758ea29bc1c567a... 0xcf3618d4680817af786a1d93465a19ab4225e69e 0xd331227a7fe6682a93e8fa07700779f11996dd3a 527000000
1 0x0000018b18fd871bd95081b75f2c1b18d9029c70026d... 0xa4cbedac57819876f8fb0f69bb6cc3d7a61c4368 0xe1f067117527457ddca0209b0bbb930268d1be16 130000000
2 0x000002b2cff6dc4e25c875127befcbd091cd7ccceed4... 0x99e462e3de92728b2a57a7b4b83a58dba98de71a 0x32cd0b721fa6999af38d674220eb5daabf383e44 495000000
3 0x000002c8f9106b692c0d06fc042d60fa124bd0731a82... 0xe0c1582a5cd193172624658ed0abeecea24835ad 0x3eca50bc396cd1464f61e3fe0bb6506d45c90c53 173002900
4 0x000002ccee593d698e68923156b131d44518cb7cb38b... 0x3a71e3b4a01221e66007d7b1d3f8566b788edac6 0x502a76d02dfaeb9a7907a4e4b28fb66519ba7d60 1498500000
In [ ]:
chord = hv.Chord(df_transfers.iloc[:, 1:])
chord.opts(
	opts.Chord(cmap='Set1', edge_cmap='Set1', edge_color='from', labels='from', node_color='from', width=600, height=600))

The GraphQL query language presents high flexibility regarding query constructs: for instance, I can query a subset of the available features of transfers filtering by a specific source address.

In [49]:
query_filtered = """query {
	transfers(where: {from: "0xfdb16996831753d5331ff813c29a93c76834a0ad"})  {
		from	
		to
		value
	}
}"""

r = requests.post(url, json={'query': query_filtered})
print(r.status_code)
#print(r.text)
json_data = json.loads(r.text)
df_transfers_filtered = pd.DataFrame(json_data["data"]["transfers"])
200
In [50]:
df_transfers_filtered
Out[50]:
from to value
0 0xfdb16996831753d5331ff813c29a93c76834a0ad 0xae844ed72c0f0bb362c39a98b00ac7cefbb712bc 1415430594
1 0xfdb16996831753d5331ff813c29a93c76834a0ad 0x70865473b490d87e9080820b0a9c28509b812d43 274862068
2 0xfdb16996831753d5331ff813c29a93c76834a0ad 0x8b6eb0e9c4bcb5e7377856625bb881015731f3c6 109349000000
3 0xfdb16996831753d5331ff813c29a93c76834a0ad 0x29aa34841ff82914f63e6c3e5fad22d2855fd1d1 129000000
4 0xfdb16996831753d5331ff813c29a93c76834a0ad 0x74e821fa3af6fdd075490832ce1742b117f5d2d1 690000000
... ... ... ...
95 0xfdb16996831753d5331ff813c29a93c76834a0ad 0xfb3da7509082b1dcb5d1447fc51acca241e7159f 130000000
96 0xfdb16996831753d5331ff813c29a93c76834a0ad 0x957ff9cc9e514fb8c572835288cd8736841efb1d 306001363
97 0xfdb16996831753d5331ff813c29a93c76834a0ad 0xb0477a0c68a548e8d4a0017b5bd8959218d0d94a 14683287812
98 0xfdb16996831753d5331ff813c29a93c76834a0ad 0x1be889576c1a4effd0acc032c1a6971438ce966b 14044903954
99 0xfdb16996831753d5331ff813c29a93c76834a0ad 0x4ba4adc2d04935236c604a5b9eb804577f2ebc49 202000000

100 rows × 3 columns